In [4]:
from setup_notebooks import *
%matplotlib inline
import sys
if DATA_PATH not in sys.path: sys.path.append(DATA_PATH)
from constants import *

In [5]:
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_rows', 6)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 800)
pd.set_option('precision', 2)
%precision 4
%pprint


Pretty printing has been turned OFF

In [6]:
from gensim.models import Word2Vec

For now let's use the Google News model (300 topics)


In [8]:
wv = Word2Vec.load_word2vec_format(os.path.join(DATA_PATH, 'local', 'GoogleNews-vectors-negative300.bin.gz'), binary=True)

In [7]:
wv.most_similar(positive=['python', 'snake',], negative=['programming'])


Out[7]:
[(u'snakes', 0.6297409534454346), (u'pythons', 0.6232618093490601), (u'lizard', 0.6179445385932922), (u'reptile', 0.6178215146064758), (u'crocodile', 0.6175473928451538), (u'poisonous_snake', 0.6041049957275391), (u'boa_constrictor', 0.604093611240387), (u'alligator', 0.603899359703064), (u'Burmese_python', 0.5941974520683289), (u'cobra', 0.5773065090179443)]

In [9]:
wv.most_similar(positive=['PyCon'])


Out[9]:
[(u'EclipseCon', 0.6095224618911743), (u'SpringOne_2GX', 0.5996967554092407), (u'GridWorld', 0.5988301634788513), (u'WordCamp', 0.5986013412475586), (u'ZendCon', 0.5928680896759033), (u'linux.conf.au', 0.5917848348617554), (u'VSLive', 0.5901633501052856), (u'OSCON', 0.5854244232177734), (u'Parallels_Summit', 0.5811077356338501), (u'BEA_eWorld', 0.5797399282455444)]

In [15]:
wv.most_similar(positive=['Portland', 'Oregon', ], negative=['city', 'government'])


Out[15]:
[(u'Grants_Pass', 0.5147859454154968), (u'Willamette', 0.5049761533737183), (u'Pacific_Northwest', 0.4871968626976013), (u'Vancouver_Wash.', 0.48666539788246155), (u'Corvallis', 0.4861029386520386), (u'Seattle', 0.48276418447494507), (u'Eugene_Ore.', 0.47883060574531555), (u'Orgeon', 0.47867003083229065), (u'Boise', 0.4764174818992615), (u'Portland_Ore.', 0.47364598512649536)]